{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Watch Agents!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Init Red and Blue  Agents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:unityagents:\n",
      "'Academy' started successfully!\n",
      "Unity Academy name: Academy\n",
      "        Number of Brains: 1\n",
      "        Number of External Brains : 1\n",
      "        Lesson number : 0\n",
      "        Reset Parameters :\n",
      "\t\t\n",
      "Unity brain name: TennisBrain\n",
      "        Number of Visual Observations (per agent): 0\n",
      "        Vector Observation space type: continuous\n",
      "        Vector Observation space size (per agent): 8\n",
      "        Number of stacked Vector Observation: 3\n",
      "        Vector Action space type: continuous\n",
      "        Vector Action space size (per agent): 2\n",
      "        Vector Action descriptions: , \n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import torch\n",
    "\n",
    "import sys\n",
    "import os\n",
    "\n",
    "sys.path.append(os.path.abspath('python/'))\n",
    "\n",
    "from unityagents import UnityEnvironment\n",
    "# MADDPG wrapper\n",
    "from maddpg_agent import maddpg_agent\n",
    "\n",
    "maddpg = maddpg_agent()\n",
    "env = UnityEnvironment(seed=0, file_name=\"Tennis_Windows_x86_64/Tennis.app\")\n",
    "\n",
    "# get the default brain\n",
    "brain_name = env.brain_names[0]\n",
    "brain = env.brains[brain_name]\n",
    "\n",
    "env_info = env.reset(train_mode=False)[brain_name]\n",
    "\n",
    "# number of agents\n",
    "num_agents = len(env_info.agents)                      \n",
    "\n",
    "# size of each action\n",
    "action_size = brain.vector_action_space_size"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. Play Before Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Score (max over agents) from episode 1: 0.0\n",
      "Score (max over agents) from episode 2: 0.09000000171363354\n",
      "Score (max over agents) from episode 3: 0.0\n",
      "Score (max over agents) from episode 4: 0.0\n",
      "Score (max over agents) from episode 5: 0.0\n",
      "Score (max over agents) from episode 6: 0.0\n",
      "Score (max over agents) from episode 7: 0.0\n"
     ]
    }
   ],
   "source": [
    "for i in range(1, 8):                                      # play game for 5 episodes\n",
    "    env_info = env.reset(train_mode=False)[brain_name]     # reset the environment         \n",
    "    states = env_info.vector_observations                  # get the current state (for each agent)\n",
    "    scores = np.zeros(num_agents)                          # initialize the score (for each agent)\n",
    "    \n",
    "    while True:\n",
    "        actions = np.random.randn(num_agents, action_size) # select an action (for each agent)\n",
    "        actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1\n",
    "        env_info = env.step(actions)[brain_name]           # send all actions to tne environment\n",
    "        next_states = env_info.vector_observations         # get next state (for each agent)\n",
    "        rewards = env_info.rewards                         # get reward (for each agent)\n",
    "        dones = env_info.local_done                        # see if episode finished\n",
    "        scores += env_info.rewards                         # update the score (for each agent)\n",
    "        states = next_states                               # roll over states to next time step\n",
    "        \n",
    "        if np.any(dones):                                  # exit loop if episode finished\n",
    "            break\n",
    "    \n",
    "    print('Score (max over agents) from episode {}: {}'.format(i, np.max(scores)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Load weights and Play "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load(dir):    \n",
    "        for i in range(num_agents):\n",
    "            maddpg.agents[i].actor_local.load_state_dict(\n",
    "                torch.load(os.path.join(dir, 'checkpoint_actor_{}.pth'.format(i))) )\n",
    "            maddpg.agents[i].critic_local.load_state_dict(\n",
    "                torch.load(os.path.join(dir, 'checkpoint_critic_{}.pth'.format(i))) )\n",
    "\n",
    "\n",
    "def play(maddpg, env, num_games=11):\n",
    "    \"\"\"Tests the training results by having both agents play a match.\n",
    "    Params\n",
    "    ======\n",
    "        maddpg (MADDPG): instance of MADDPG wrapper class\n",
    "        env (UnityEnvironment): instance of Unity environment for testing\n",
    "        num_games (int): number of games to be played\n",
    "    \"\"\"\n",
    "    \n",
    "    print(\"Agent #0: Red racket\")\n",
    "    print(\"Agent #1: Blue racket\")\n",
    "    print(\"---------------------\")\n",
    "\n",
    "    game_scores = [0 for _ in range(num_agents)]\n",
    "\n",
    "    # Environment information\n",
    "    brain_name = env.brain_names[0]\n",
    "\n",
    "    for i_episode in range(1, num_games+1):\n",
    "        env_info = env.reset(train_mode=False)[brain_name]   \n",
    "        states = env_info.vector_observations\n",
    "        scores = np.zeros(num_agents)\n",
    "\n",
    "        t_step = 0\n",
    "        \n",
    "        while True:\n",
    "            actions = maddpg.act(states)\n",
    "\n",
    "            env_info = env.step(actions)[brain_name]\n",
    "            next_states = env_info.vector_observations\n",
    "            rewards = env_info.rewards\n",
    "            scores += rewards\n",
    "            dones = env_info.local_done\n",
    "            t_step += 1\n",
    "\n",
    "            if np.any(dones):\n",
    "                winner = np.argmax(scores)\n",
    "                game_scores[winner] += 1\n",
    "                if (t_step > 0):\n",
    "                    t_step -= 1\n",
    "                print('Game: {}, partial score: {},  Score #0: {:.2f}, Score #1: {:.2f}, Timesteps: {} '.\n",
    "                       format(i_episode, game_scores, scores[0], scores[1], t_step))\n",
    "                break\n",
    "\n",
    "            states = next_states\n",
    "\n",
    "    print(\"---------------------\")\n",
    "    print(\"Winner: Agent #{}\".format(np.argmax(game_scores)))\n",
    "  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4. Play after training in 1600 games"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Agent #0: Red racket\n",
      "Agent #1: Blue racket\n",
      "---------------------\n",
      "Game: 1, partial score: [1, 0],  Score #0: 2.60, Score #1: 2.60, Timesteps: 1000 \n",
      "Game: 2, partial score: [2, 0],  Score #0: 2.70, Score #1: 2.60, Timesteps: 1000 \n",
      "Game: 3, partial score: [3, 0],  Score #0: 0.20, Score #1: 0.19, Timesteps: 104 \n",
      "Game: 4, partial score: [3, 1],  Score #0: 0.09, Score #1: 0.10, Timesteps: 50 \n",
      "Game: 5, partial score: [4, 1],  Score #0: 2.60, Score #1: 2.60, Timesteps: 1000 \n",
      "---------------------\n",
      "Winner: Agent #0\n"
     ]
    }
   ],
   "source": [
    "dir_chkpoints = 'dir_chk_1600b_episodes'\n",
    "load(dir_chkpoints)\n",
    "play(maddpg, env, num_games=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5. Play after training in 1700 games"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Agent #0: Red racket\n",
      "Agent #1: Blue racket\n",
      "---------------------\n",
      "Game: 1, partial score: [1, 0],  Score #0: 2.70, Score #1: 2.60, Timesteps: 1000 \n",
      "Game: 2, partial score: [2, 0],  Score #0: 2.60, Score #1: 2.60, Timesteps: 1000 \n",
      "Game: 3, partial score: [3, 0],  Score #0: 0.10, Score #1: 0.09, Timesteps: 37 \n",
      "Game: 4, partial score: [3, 1],  Score #0: -0.01, Score #1: 0.10, Timesteps: 30 \n",
      "Game: 5, partial score: [4, 1],  Score #0: 2.60, Score #1: 2.60, Timesteps: 1000 \n",
      "---------------------\n",
      "Winner: Agent #0\n"
     ]
    }
   ],
   "source": [
    "dir_chkpoints = 'dir_chk_1700d_episodes'\n",
    "load(dir_chkpoints)\n",
    "play(maddpg, env, num_games=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
